#!/usr/bin/python

# Copyright  2017  Zhiyuan Tang
#            2018  Lantian Li 
# Apache 2.0.

# generate ali.ark, spk_counts and spk_num

import sys, collections
import os

dir = sys.argv[1]
path = sys.argv[2] 

len_dict = collections.OrderedDict()
with open(dir + '/feats.len', 'r') as utt_lens:
        for utt_len in [line.strip().split(' ') for line in utt_lens]:
                len_dict[utt_len[0]] = utt_len[1]

spk_dict = {}
all_num = -1
with open(dir + '/utt2num', 'r') as spk_ids:
        for spk_id in [line.strip().split(' ') for line in spk_ids]:
                spk_dict[spk_id[0]] = spk_id[1]
                if all_num < int(spk_id[1]):
                        all_num = int(spk_id[1])

counts = []   
for n in range(0, all_num + 1):
        counts.append(0)

spk_ali = open(path + os.sep + 'ali.ark.tmp', 'w')
for i in len_dict.keys():
        line = i
        num = int(len_dict[i])
        for j in range(0, num):
                counts[int(spk_dict[i])] += 1
                line += ' ' +  spk_dict[i]
        spk_ali.write(line + '\n')
spk_ali.close()

spk_counts = open(path + os.sep + 'target_counts', 'w')
spk_counts.write('[')
for j in range(0, len(counts)):
        spk_counts.write(' ' + str(counts[j]))
spk_counts.write(' ]')
spk_counts.close()

spk_num = open(path + os.sep + 'target_num', 'w')
spk_num.write(str(all_num + 1))
spk_num.close()

